Various bug fixes, and NMI/DF improvements for x86_64.
Signed-off-by: keir.fraser@cl.cam.ac.uk
.org 0x1000
ENTRY(idle_pg_table) # Initial page directory is 4kB
.org 0x2000
-ENTRY(cpu0_stack) # Initial stack is 8kB
- .org 0x4000
+ENTRY(cpu0_stack)
+ .org 0x2000 + STACK_SIZE
ENTRY(stext)
ENTRY(_stext)
identmap /* Too orangey for crows :-) */
.org 0x4000
-ENTRY(cpu0_stack) # Initial stack is 8kB
+ENTRY(cpu0_stack)
- .org 0x6000
+ .org 0x4000 + STACK_SIZE
ENTRY(stext)
ENTRY(_stext)
unsigned long cpu_initialized;
void __init cpu_init(void)
{
- extern void percpu_traps_init(void);
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
+ unsigned char idt_load[10];
if ( test_and_set_bit(nr, &cpu_initialized) )
panic("CPU#%d already initialized!!!\n", nr);
printk("Initializing CPU#%d\n", nr);
- /* Set up GDT and IDT. */
SET_GDT_ENTRIES(current, DEFAULT_GDT_ENTRIES);
SET_GDT_ADDRESS(current, DEFAULT_GDT_ADDRESS);
__asm__ __volatile__ ( "lgdt %0" : "=m" (*current->arch.gdt) );
- __asm__ __volatile__ ( "lidt %0" : "=m" (idt_descr) );
+
+ *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
+ *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[nr];
+ __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
/* No nested task. */
__asm__ __volatile__ ( "pushf ; andw $0xbfff,(%"__OP"sp) ; popf" );
CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
#undef CD
- percpu_traps_init();
-
/* Install correct page table. */
write_ptbase(current);
void __init start_secondary(void)
{
unsigned int cpu = cpucount;
- /* 6 bytes suitable for passing to LIDT instruction. */
- unsigned char idt_load[6];
+ extern void percpu_traps_init(void);
extern void cpu_init(void);
set_current(idle_task[cpu]);
/*
- * Dont put anything before smp_callin(), SMP
- * booting is too fragile that we want to limit the
- * things done here to the most necessary things.
+ * At this point, boot CPU has fully initialised the IDT. It is
+ * now safe to make ourselves a private copy.
*/
+ idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+ memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
+
+ percpu_traps_init();
+
cpu_init();
+
smp_callin();
while (!atomic_read(&smp_commenced))
rep_nop();
- /*
- * At this point, boot CPU has fully initialised the IDT. It is
- * now safe to make ourselves a private copy.
- */
- idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
- memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
- *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
- *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
- __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
-
/*
* low-memory mappings have been cleared, flush them from the local TLBs
* too.
return 0;
}
+unsigned long nmi_softirq_reason;
+static void nmi_softirq(void)
+{
+ if ( dom0 == NULL )
+ return;
+
+ if ( test_and_clear_bit(0, &nmi_softirq_reason) )
+ send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
+
+ if ( test_and_clear_bit(1, &nmi_softirq_reason) )
+ send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
+}
+
asmlinkage void mem_parity_error(struct xen_regs *regs)
{
- console_force_unlock();
- printk("\n\nNMI - MEMORY ERROR\n");
- fatal_trap(TRAP_nmi, regs);
+ /* Clear and disable the parity-error line. */
+ outb((inb(0x61)&15)|4,0x61);
+
+ switch ( opt_nmi[0] )
+ {
+ case 'd': /* 'dom0' */
+ set_bit(0, &nmi_softirq_reason);
+ raise_softirq(NMI_SOFTIRQ);
+ case 'i': /* 'ignore' */
+ break;
+ default: /* 'fatal' */
+ console_force_unlock();
+ printk("\n\nNMI - MEMORY ERROR\n");
+ fatal_trap(TRAP_nmi, regs);
+ }
}
asmlinkage void io_check_error(struct xen_regs *regs)
{
- console_force_unlock();
+ /* Clear and disable the I/O-error line. */
+ outb((inb(0x61)&15)|8,0x61);
- printk("\n\nNMI - I/O ERROR\n");
- fatal_trap(TRAP_nmi, regs);
+ switch ( opt_nmi[0] )
+ {
+ case 'd': /* 'dom0' */
+ set_bit(0, &nmi_softirq_reason);
+ raise_softirq(NMI_SOFTIRQ);
+ case 'i': /* 'ignore' */
+ break;
+ default: /* 'fatal' */
+ console_force_unlock();
+ printk("\n\nNMI - I/O ERROR\n");
+ fatal_trap(TRAP_nmi, regs);
+ }
}
static void unknown_nmi_error(unsigned char reason)
{
++nmi_count(smp_processor_id());
-#if CONFIG_X86_LOCAL_APIC
if ( nmi_watchdog )
nmi_watchdog_tick(regs);
- else
-#endif
- unknown_nmi_error((unsigned char)(reason&0xff));
-}
-
-unsigned long nmi_softirq_reason;
-static void nmi_softirq(void)
-{
- if ( dom0 == NULL )
- return;
-
- if ( test_and_clear_bit(0, &nmi_softirq_reason) )
- send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
- if ( test_and_clear_bit(1, &nmi_softirq_reason) )
- send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
+ if ( reason & 0x80 )
+ mem_parity_error(regs);
+ else if ( reason & 0x40 )
+ io_check_error(regs);
+ else if ( !nmi_watchdog )
+ unknown_nmi_error((unsigned char)(reason&0xff));
}
asmlinkage int math_state_restore(struct xen_regs *regs)
void __init trap_init(void)
{
- extern void doublefault_init(void);
- doublefault_init();
+ extern void percpu_traps_init(void);
+ extern void cpu_init(void);
/*
* Note that interrupt gates are always used, rather than trap gates. We
/* CPU0 uses the master IDT. */
idt_tables[0] = idt_table;
- /*
- * Should be a barrier for any external CPU state.
- */
- {
- extern void cpu_init(void);
- cpu_init();
- }
+ percpu_traps_init();
+
+ cpu_init();
open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
# Okay, its almost a normal NMI tick. We can only process it if:
# A. We are the outermost Xen activation (in which case we have
# the selectors safely saved on our stack)
- # B. DS-GS all contain sane Xen values.
+ # B. DS and ES contain sane Xen values.
# In all other cases we bail without touching DS-GS, as we have
# interrupted an enclosing Xen activation in tricky prologue or
# epilogue code.
orb $0x4,%al
outb %al,$0x61
cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
- je restore_all_xen
+ je nmi_out
bts $0,%ss:SYMBOL_NAME(nmi_softirq_reason)
bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
- je restore_all_xen
+ je nmi_out
movl $(__HYPERVISOR_DS),%edx # nmi=fatal
movl %edx,%ds
movl %edx,%es
push %edx
call SYMBOL_NAME(mem_parity_error)
addl $4,%esp
- jmp ret_from_intr
+nmi_out:movl %ss:XREGS_eflags(%esp),%eax
+ movb %ss:XREGS_cs(%esp),%al
+ testl $(3|X86_EFLAGS_VM),%eax
+ jz restore_all_xen
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
+ GET_CURRENT(%ebx)
+ jmp test_all_events
nmi_io_err:
# Clear and disable the I/O-error line
orb $0x8,%al
outb %al,$0x61
cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore
- je restore_all_xen
+ je nmi_out
bts $1,%ss:SYMBOL_NAME(nmi_softirq_reason)
bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat)
cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0
- je restore_all_xen
+ je nmi_out
movl $(__HYPERVISOR_DS),%edx # nmi=fatal
movl %edx,%ds
movl %edx,%es
push %edx
call SYMBOL_NAME(io_check_error)
addl $4,%esp
- jmp ret_from_intr
+ jmp nmi_out
ENTRY(setup_vm86_frame)
__asm__ __volatile__ ( "hlt" );
}
-void __init doublefault_init(void)
+void __init percpu_traps_init(void)
{
- /*
- * Make a separate task for double faults. This will get us debug output if
- * we blow the kernel stack.
- */
- struct tss_struct *tss = &doublefault_tss;
- memset(tss, 0, sizeof(*tss));
- tss->ds = __HYPERVISOR_DS;
- tss->es = __HYPERVISOR_DS;
- tss->ss = __HYPERVISOR_DS;
- tss->esp = (unsigned long)
- &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
- tss->__cr3 = __pa(idle_pg_table);
- tss->cs = __HYPERVISOR_CS;
- tss->eip = (unsigned long)do_double_fault;
- tss->eflags = 2;
- tss->bitmap = IOBMP_INVALID_OFFSET;
- _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
- (unsigned long)tss, 235, 9);
+ if ( smp_processor_id() == 0 )
+ {
+ /*
+ * Make a separate task for double faults. This will get us debug
+ * output if we blow the kernel stack.
+ */
+ struct tss_struct *tss = &doublefault_tss;
+ memset(tss, 0, sizeof(*tss));
+ tss->ds = __HYPERVISOR_DS;
+ tss->es = __HYPERVISOR_DS;
+ tss->ss = __HYPERVISOR_DS;
+ tss->esp = (unsigned long)
+ &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
+ tss->__cr3 = __pa(idle_pg_table);
+ tss->cs = __HYPERVISOR_CS;
+ tss->eip = (unsigned long)do_double_fault;
+ tss->eflags = 2;
+ tss->bitmap = IOBMP_INVALID_OFFSET;
+ _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
+ (unsigned long)tss, 235, 9);
+ }
set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
}
-void __init percpu_traps_init(void)
-{
-}
-
long set_fast_trap(struct exec_domain *p, int idx)
{
trap_info_t *ti;
#include <public/xen.h>
ENTRY(hypercall)
- movl $0x0833,8(%rsp)
+ movl $__GUEST_SS,8(%rsp)
pushq %r11
- pushq $0x082b
+ pushq $__GUEST_CS
pushq %rcx
pushq $0
SAVE_ALL
jmp error_code
ENTRY(nmi)
- iretq
+ pushq $0
+ SAVE_ALL
+ inb $0x61,%al
+ movl %eax,%esi # reason
+ movl %esp,%edi # regs
+ call SYMBOL_NAME(do_nmi)
+ jmp restore_all_xen
.data
printk(" L1 = %p\n", page);
}
-#define DOUBLEFAULT_STACK_SIZE 1024
-static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
asmlinkage void double_fault(void);
-
asmlinkage void do_double_fault(struct xen_regs *regs)
{
/* Disable the NMI watchdog. It's useless now. */
/* Find information saved during fault and dump it to the console. */
printk("************************************\n");
- printk("EIP: %04lx:[<%p>] \nEFLAGS: %p\n",
- 0xffff & regs->cs, regs->rip, regs->eflags);
- printk("rax: %p rbx: %p rcx: %p rdx: %p\n",
- regs->rax, regs->rbx, regs->rcx, regs->rdx);
- printk("rsi: %p rdi: %p rbp: %p rsp: %p\n",
- regs->rsi, regs->rdi, regs->rbp, regs->rsp);
- printk("r8: %p r9: %p r10: %p r11: %p\n",
- regs->r8, regs->r9, regs->r10, regs->r11);
- printk("r12: %p r13: %p r14: %p r15: %p\n",
- regs->r12, regs->r13, regs->r14, regs->r15);
+ show_registers(regs);
printk("************************************\n");
- printk("CPU%d DOUBLE FAULT -- system shutdown\n",
- logical_smp_processor_id());
+ printk("CPU%d DOUBLE FAULT -- system shutdown\n", smp_processor_id());
printk("System needs manual reset.\n");
printk("************************************\n");
__asm__ __volatile__ ( "hlt" );
}
-void __init doublefault_init(void)
-{
- int i;
-
- /* Initialise IST1 for each CPU. Note the handler is non-reentrant. */
- for ( i = 0; i < NR_CPUS; i++ )
- init_tss[i].ist[0] = (unsigned long)
- &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
-
- /* Set interrupt gate for double faults, specifying IST1. */
- set_intr_gate(TRAP_double_fault, &double_fault);
- idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
-}
-
asmlinkage void hypercall(void);
void __init percpu_traps_init(void)
{
char *stack_top = (char *)get_stack_top();
char *stack = (char *)((unsigned long)stack_top & ~(STACK_SIZE - 1));
+ int cpu = smp_processor_id();
+
+ /* Double-fault handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[0] = (unsigned long)&stack[1024];
+ set_intr_gate(TRAP_double_fault, &double_fault);
+ idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+
+ /* NMI handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[1] = (unsigned long)&stack[2048];
+ idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */
+
+ /*
+ * Trampoline for SYSCALL entry from long mode.
+ */
+
+ /* Skip the NMI and DF stacks. */
+ stack = &stack[2048];
+ wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
/* movq %rsp, saversp(%rip) */
stack[0] = 0x48;
stack[14] = 0xe9;
*(u32 *)&stack[15] = (char *)hypercall - &stack[19];
+ /*
+ * Trampoline for SYSCALL entry from compatibility mode.
+ */
+
+ /* Skip the long-mode entry trampoline. */
+ stack = &stack[19];
+ wrmsr(MSR_CSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
+
+ /* movq %rsp, saversp(%rip) */
+ stack[0] = 0x48;
+ stack[1] = 0x89;
+ stack[2] = 0x25;
+ *(u32 *)&stack[3] = (stack_top - &stack[7]) - 16;
+
+ /* leaq saversp(%rip), %rsp */
+ stack[7] = 0x48;
+ stack[8] = 0x8d;
+ stack[9] = 0x25;
+ *(u32 *)&stack[10] = (stack_top - &stack[14]) - 16;
+
+ /* jmp hypercall */
+ stack[14] = 0xe9;
+ *(u32 *)&stack[15] = (char *)hypercall - &stack[19];
+
+ /*
+ * Common SYSCALL parameters.
+ */
+
wrmsr(MSR_STAR, 0, (FLAT_RING3_CS64<<16) | __HYPERVISOR_CS);
- wrmsr(MSR_LSTAR, (unsigned long)stack, ((unsigned long)stack>>32));
- wrmsr(MSR_SYSCALL_MASK, 0xFFFFFFFFU, 0U);
+ wrmsr(MSR_SYSCALL_MASK, ~EF_IE, 0U); /* disable interrupts */
}
void *decode_reg(struct xen_regs *regs, u8 b)
#define __HYPERVISOR_DS32 0x0818
#define __HYPERVISOR_DS __HYPERVISOR_DS64
+#define __GUEST_CS 0x082b
+#define __GUEST_DS 0x0000
+#define __GUEST_SS 0x0833
+
/* For generic assembly code: use macros to define operation/operand sizes. */
#define __OS "q" /* Operation Suffix */
#define __OP "r" /* Operand Prefix */